# -*- coding: utf-8 -*-
from datetime import datetime
from scrapy import Request
from scrapy.utils.project import get_project_settings
from scrapy.exceptions import IgnoreRequest
from zc_core.util.http_util import retry_request
from zc_core.dao.batch_dao import BatchDao
from zc_core.util.batch_gen import time_to_batch_no
from ispacechina.rules import *
from zc_core.spiders.base import BaseSpider


class FullSpider(BaseSpider):
    name = 'full'
    # 常用链接
    root_cat_url = 'https://mall.ispacechina.com/rest/service/routing/nouser/qryBusiCommodityCatalogService?upperCatalogId=0&channelId=2&pageNo=1&pageSize=1000'
    sub_cat_url = 'https://mall.ispacechina.com/rest/service/routing/nouser/qryOthLevelCommodityCatalogService?upperCatalogId={}&channelId=2&pageNo=1&pageSize=1000'
    sku_list_url = 'https://mall.ispacechina.com/rest/service/routing/nouser/searchBarEsService'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        self.page_size = 50
        settings = get_project_settings()
        self.page_limit = math.floor(10000 / self.page_size)
        self.root_cat_limit = settings.get('ROOT_CAT_LIMIT', [])
        self.special_ladders = settings.get('SPECIAL_LADDERS', {})

    def start_requests(self):
        yield Request(
            url=self.root_cat_url,
            meta={
                'reqType': 'catalog',
                'batchNo': self.batch_no,
            },
            callback=self.parse_root_cat,
            errback=self.error_back,
            priority=100,
        )

    def parse_root_cat(self, response):
        cats = parse_root_cat(response)
        if cats:
            self.logger.info('主分类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)

            for cat in cats:
                yield Request(
                    url=self.sub_cat_url.format(cat.get('catalogId')),
                    meta={
                        'reqType': 'catalog',
                        'batchNo': self.batch_no,
                    },
                    callback=self.parse_sub_cat,
                    errback=self.error_back,
                    priority=100,
                )

    def parse_sub_cat(self, response):
        # 处理品类列表
        cats = parse_sub_cat(response)
        if cats:
            self.logger.info('子分类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)

            for cat in cats:
                cat_id = cat.get('catalogId')
                cat_name = cat.get('catalogName')
                if cat.get('level') == 3 and cat_id[0:3] in self.root_cat_limit:
                    page = 1
                    if cat_id in self.special_ladders:
                        # 三级分类 + 价格区间阶梯
                        cat_ladders = self.special_ladders.get(cat_id, [])
                        if cat_ladders and len(cat_ladders) > 1:
                            page = 1
                            for idx in range(1, len(cat_ladders)):
                                min_price = cat_ladders[idx - 1]
                                max_price = cat_ladders[idx]
                                # 总页数
                                yield scrapy.FormRequest(
                                    url=self.sku_list_url,
                                    formdata={
                                        'queryLocation': '2',
                                        'categoryId': cat_id,
                                        'brandId': '',
                                        'supplierId': '',
                                        'orderByColumn': '0',
                                        'orderType': '0',
                                        'pageSize': str(self.page_size),
                                        'pageNo': str(page),
                                        'area': '1_2800_2848',
                                        'isPraise': '1',
                                        'shoppingType': '3',
                                        'searchType': '2',
                                        'level': '3',
                                        'userId': '',
                                        'minSalesPrice': str(min_price),
                                        'maxSalesPrice': str(max_price),
                                    },
                                    meta={
                                        'reqType': 'sku',
                                        'batchNo': self.batch_no,
                                        'pageSize': self.page_size,
                                        'pageNo': page,
                                        'catalogId': cat_id,
                                        'catalogName': cat_name,
                                        'minPrice': min_price,
                                        'maxPrice': max_price,
                                    },
                                    callback=self.parse_sku_page_with_price,
                                    errback=self.error_back,
                                    priority=100,
                                    dont_filter=True
                                )
                    else:
                        # 三级分类
                        yield scrapy.FormRequest(
                            url=self.sku_list_url,
                            formdata={
                                'queryLocation': '2',
                                'categoryId': cat_id,
                                'brandId': '',
                                'supplierId': '',
                                'orderByColumn': '0',
                                'orderType': '0',
                                'pageSize': str(self.page_size),
                                'pageNo': str(page),
                                'area': '1_72_2799',
                                'isPraise': '1',
                                'shoppingType': '3',
                                'searchType': '2',
                                'level': '3',
                                'userId': '',
                            },
                            meta={
                                'reqType': 'sku',
                                'batchNo': self.batch_no,
                                'pageSize': self.page_size,
                                'pageNo': page,
                                'catalogId': cat_id,
                                'catalogName': cat_name,
                            },
                            callback=self.parse_sku_page,
                            errback=self.error_back,
                            priority=100,
                            dont_filter=True
                        )

    # 处理首页
    def parse_sku_page(self, response):
        meta = response.meta
        cur_page = meta.get('pageNo')
        cat_id = meta.get('catalogId')
        cat_name = meta.get('catalogName')

        # 本页商品
        sku_list, item_list = parse_item(response)
        if sku_list and item_list:
            self.logger.info('清单1: cat=%s, page=%s, cnt=%s' % (cat_id, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)
            yield Box('item', self.batch_no, item_list)
        else:
            self.logger.info('分页为空1: cat=%s, page=%s' % (cat_id, cur_page))

        # 总页数
        cat_sku_cnt = parse_sku_page(response)
        if cat_sku_cnt:
            total = cat_sku_cnt.get('totalPage')
            self.logger.info('页数1: cat=%s, total=%s' % (cat_id, total))
            # 记录分类商品总量
            yield cat_sku_cnt

            if total > self.page_limit:
                self.logger.info('页数超限1: cat=%s[%s], total=%s' % (cat_id, cat_name, total))

            for page in range(2, total + 1):
                # 商品列表分页
                yield scrapy.FormRequest(
                    url=self.sku_list_url,
                    formdata={
                        'queryLocation': '2',
                        'categoryId': cat_id,
                        'brandId': '',
                        'supplierId': '',
                        'orderByColumn': '0',
                        'orderType': '0',
                        'pageSize': str(self.page_size),
                        'pageNo': str(page),
                        'area': '1_2800_2848',
                        'isPraise': '1',
                        'shoppingType': '3',
                        'searchType': '2',
                        'level': '3',
                    },
                    meta={
                        'reqType': 'sku',
                        'batchNo': self.batch_no,
                        'pageSize': self.page_size,
                        'pageNo': page,
                        'catalogId': cat_id,
                        'catalogName': cat_name,
                    },
                    callback=self.parse_item,
                    errback=self.error_back,
                    priority=100,
                    dont_filter=True
                )

    # 处理首页
    def parse_sku_page_with_price(self, response):
        meta = response.meta
        cur_page = meta.get('pageNo')
        cat_id = meta.get('catalogId')
        cat_name = meta.get('catalogName')
        min_price = meta.get('minPrice')
        max_price = meta.get('maxPrice')

        # 本页商品
        sku_list, item_list = parse_item(response)
        if sku_list and item_list:
            self.logger.info('清单2: cat=%s, price=<%s, %s>, page=%s, cnt=%s' % (
            cat_id, min_price, max_price, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)
            yield Box('item', self.batch_no, item_list)
        else:
            self.logger.info('分页为空2: cat=%s, price=<%s, %s>, page=%s' % (cat_id, min_price, max_price, cur_page))

        # 总页数
        cat_sku_cnt = parse_sku_page(response)
        if cat_sku_cnt:
            total = cat_sku_cnt.get('totalPage')
            self.logger.info('页数2: cat=%s, price=<%s, %s>, total=%s' % (cat_id, min_price, max_price, total))

            if total > self.page_limit:
                self.logger.info(
                    '页数超限2: cat=%s[%s], price=<%s, %s>, total=%s' % (cat_id, cat_name, min_price, max_price, total))

            for page in range(2, total + 1):
                # 商品列表分页
                yield scrapy.FormRequest(
                    url=self.sku_list_url,
                    formdata={
                        'queryLocation': '2',
                        'categoryId': cat_id,
                        'brandId': '',
                        'supplierId': '',
                        'orderByColumn': '0',
                        'orderType': '0',
                        'pageSize': str(self.page_size),
                        'pageNo': str(page),
                        'area': '1_2800_2848',
                        'isPraise': '1',
                        'shoppingType': '3',
                        'searchType': '2',
                        'level': '3',
                        'minSalesPrice': str(min_price),
                        'maxSalesPrice': str(max_price),
                    },
                    meta={
                        'reqType': 'sku',
                        'batchNo': self.batch_no,
                        'pageSize': self.page_size,
                        'pageNo': page,
                        'catalogId': cat_id,
                        'catalogName': cat_name,
                        'minPrice': min_price,
                        'maxPrice': max_price,
                    },
                    callback=self.parse_item,
                    errback=self.error_back,
                    priority=100,
                    dont_filter=True
                )

    def parse_item(self, response):
        meta = response.meta
        cur_page = meta.get('pageNo')
        cat_id = meta.get('catalogId')
        min_price = meta.get('minPrice', None)
        max_price = meta.get('maxPrice', None)
        # 处理商品
        sku_list, item_list = parse_item(response)
        if sku_list and item_list:
            if min_price or max_price:
                self.logger.info('清单2: cat=%s, price=<%s, %s>, page=%s, cnt=%s' % (
                cat_id, min_price, max_price, cur_page, len(sku_list)))
            else:
                self.logger.info('清单1: cat=%s, page=%s, cnt=%s' % (cat_id, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)
            yield Box('item', self.batch_no, item_list)
        else:
            self.logger.info('分页为空: cat=%s, page=%s' % (cat_id, cur_page))
